home *** CD-ROM | disk | FTP | other *** search
Text File | 1997-06-26 | 4.2 KB | 167 lines | [TEXT/CWIE] |
- /*------------------------------------------------------------------------------
- #
- # NewsTicker, my Hack for 1997
- #
- # NewsComExtractor.h - Derived from HTMLExtractor, we get passed the tokens
- # and try to recognize headlines out of it. We parse
- # the page "www.news.com", the technical news page of
- # c|net.com.
- #
- ------------------------------------------------------------------------------*/
- #include <string.h>
-
- #include "TickerGlobals.h"
- #include "NewsComExtractor.h"
- #include "HTMLExtractor.h"
-
- // Refresh every 20 minutes
-
- #define kNewsPeriod 1200
- #define kNewsAddress "www.news.com"
-
- // Globals for the News.Com extractor
-
- unsigned long gNewsNextTime = 0;
-
- class NEWSExtractor: public HTMLExtractor
- {
- protected:
- enum NewsParser { knpParsing,
- //text headlines are <f><strong><a>headline
- knpHasFont, knpHasStrong, knpHasLinkAndStrong,
- knpHasAllForText, knpWaitingForParagraph,
- //Graphic headlines are <a><img>
- knpHasLink };
-
- NewsParser mfCurrentState;
- Str255 mfTheURL;
- Boolean mfInTD;
-
- public:
- NEWSExtractor(sMyDataPtr theDataPtr);
- virtual ~NEWSExtractor (void){ }
-
- virtual void HandleToken(char* string, short numchars, Boolean isCommand);
- };
-
- //
- // We just parse the entries to find the element
- //
- NEWSExtractor::NEWSExtractor(sMyDataPtr theDataPtr)
- :HTMLExtractor(kNewsAddress, 1000, theDataPtr)
- {
- unsigned long now;
-
- mfCurrentState = knpParsing; //just waiting for our thing to come through
- mfInTD = false;
-
- GetDateTime(&now);
- gNewsNextTime = now + kNewsPeriod; //refresh the news every 20 minutes
- }
-
- void NEWSExtractor::HandleToken(char* string, short numchars, Boolean isCommand)
- {
- Str255 thestr;
-
- if (isCommand)
- {
- if (MyCompareStr(string, "<TD ")) //table delimiters mark the image links
- mfInTD = true;
- if (MyCompareStr(string, "</TD "))
- {
- mfInTD = false;
- mfCurrentState = knpParsing;
- }
-
- switch (mfCurrentState)
- {
- case knpParsing: //from nothing, we want font or A
- if (MyCompareStr(string, "<FONT "))
- mfCurrentState = knpHasFont;
-
- else if (MyCompareStr(string, "<A ")&&mfInTD)
- {
- if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
- {
- mfCurrentState = knpHasLink;
- }
- else mfCurrentState = knpParsing;
- }
- break;
- case knpHasFont: //for this, we only want strong
- if (MyCompareStr(string, "<STRONG>"))
- mfCurrentState = knpHasLinkAndStrong;
- else mfCurrentState = knpParsing;
- break;
- case knpHasLinkAndStrong: //for this, we only want <a>
- if (MyCompareStr(string, "<A "))
- {
- if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
- {
- mfCurrentState = knpHasAllForText;
- }
- else mfCurrentState = knpParsing;
- }
- else mfCurrentState = knpParsing;
- break;
- case knpHasAllForText:
- mfCurrentState = knpParsing; //any tag from this position is a failure
- break;
- case knpWaitingForParagraph: //OK, from now on, we're only waiting for a <P>
- if (MyCompareStr(string, "<P>"))
- mfCurrentState = knpParsing;
- break;
- case knpHasLink: //for this, we only want an img, if there's an alt text
- if (MyCompareStr(string, "<IMG "))
- {
- FindATag(string+4, (char*)&thestr[1], "ALT");
- thestr[0] = strlen( (char*)&thestr[1] );
- if (thestr[0]>0)
- AddEntry(thestr, mfTheURL);
- }
- mfCurrentState = knpParsing;
- break;
- }
- }
- else
- {
- if (mfCurrentState==knpHasAllForText) //OK, get got a headline!
- {
- if (numchars>255)
- numchars = 255;
- thestr[0] = numchars;
- BlockMove(string, &thestr[1], numchars);
-
- //Add the entry
- AddEntry(thestr, mfTheURL);
-
- mfCurrentState = knpWaitingForParagraph;
- }
- else if (mfCurrentState!=knpWaitingForParagraph)
- mfCurrentState = knpParsing; //and wait for tne next headline
- }
- }
-
-
- void LoadNewsCom(sMyDataPtr gGlobalsPtr)
- {
- NEWSExtractor* theparser = new NEWSExtractor(gGlobalsPtr);
-
- theparser->ReadEntries();
- delete theparser;
-
- InitCursor();
- }
-
- // This reloads us if necessary
- Boolean MustReloadNewsCom(sMyDataPtr gGlobalsPtr)
- {
- unsigned long now;
-
- GetDateTime(&now);
-
- if (now<gNewsNextTime) //time to check yet?
- return false;
-
- return true; //always recheck on the time
- }